## Daniel J. Mallinson
## Schoolyard Politics R script
## 05-23-2016


rm(list = ls(all = TRUE)) # Remove old items from R memory

library(foreign)

pooled.data <- read.csv("cross_sec_data_rep.csv")

data <- pooled.data #Create working dataset

###########################################################
####### Summary of Legislative Components (Table 1) #######
###########################################################

colSums(data[,4:19])

###########################################################
############# Tetrachoric correlations (SI) ###############
###########################################################

## Tetrachoric correlations
library(psych)
tetrachoric(data[,4:19])
tetrachoric(data[,4:19], correct=0) #Same Thresholds, but larger correlations

###########################################################
############### Latent Trait Analysis #####################
###########################################################
#install.packages("ltm") #Uncomment to install ltm package
library(ltm) # Load package for latent variable modeling

latent <- data[,4:19] # Pulls indicators for each component

descript(latent) #Descriptives for response data

fit1 <- rasch(latent, constraint=cbind(length(latent)+1,1)) #constrains discriminant to 1
summary(fit1)
coef(fit1, prob=T, order=T)
GoF.rasch(fit1, B=200)
margins(fit1)
margins(fit1, type="three-way", nprint=2)

##################### Best Fit ############################

fit2 <- rasch(latent) # unconstrained
summary(fit2) #discriminant is definitely different than 1
coef(fit2, prob=TRUE, order=TRUE)
margins(fit2, type="three-way", nprint=2)
GoF.rasch(fit2, B=200)
anova(fit1, fit2)  # fit 2 is better, but still has some problems in three-way margins


###########################################################

fit3 <- ltm(latent~z1) # adds discriminant for each component
summary(fit3)
anova(fit2, fit3) # Fit 2 is still better

fit4 <- tpm(latent, type="rasch", max.guessing=1) # includes guessing parameter in unconstrained Rasch
summary(fit4)
anova(fit2, fit4) # Fit 2 is still better

#################### Create plots #########################

# Plot item response category characteristic curves for fit2 (Supplementary Materials)

library("RColorBrewer")
setEPS()
postscript("comprehensive_icc.eps")
#labels <- c("Purpose", "Scope", "Prohibited Behavior", "Enumerated Groups", "Dist Policy", "Dist Policy Review", "Definitions", "Reporting", "Investigations", "Written Records", "Consequences", "Mental Health", "Communications", "Training", "Monitoring", "Legal Remidies")
#color<-colorRampPalette(brewer.pal(12,"Paired"))(16)
color <- c("black", "blueviolet", "red", "darkgoldenrod", "blueviolet", "darkgreen", "deeppink4", "orange1", "seagreen", "violetred", "yellowgreen", "slategrey", "slateblue4", "saddlebrown", "mediumblue", "magenta")
plot(fit2, type="ICC", legend=FALSE, labels=labels, col=color, xlim=c(-4.5, 4.5), ylim=c(-0.05,1.05), annot=FALSE, lwd=2) 
#abline(v = -4:4, h = seq(0, 1, 0.2), col = "lightgray", lty = "dotted")
#textx <- c(-3.8, -3.6, -3.4, -3.2, -3.0, -2.8, -2.6, -2.4, -2.2, -2.0, -1.8, -1.6, -1.4, -1.2, -1.0, -0.8)
#texty <- c(0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85)
#labels <- c("Scope", "District Policy", "
#text(textx, texty, col=color)
dev.off()

# Plot item information curves for fit2 (Supplementary Materials)
setEPS()
postscript("comprehensive_iic.eps")
plot(fit2, type="IIC", labels=labels, legend=TRUE, col=color)
dev.off()

################## Create Factor scores ###################

set.seed(123) #Set a seed so the scores are replicable

factor2 <- factor.scores(fit2, resp.patterns= latent, method="MI") #use MI because of small sample properties of data (Rizopoulos and Moustaki 2008)
factor2 #Print the factor object to screen

## Correlations with factor scores and Department of Education's expansiveness scores
#install.packages("Hmisc") #Uncomment to install Hmisc package
library(Hmisc)
cor.test(data$expansiveness, factor2$score.dat$z1) # 0.91, p < 0.001

#Add comprehensiveness score to dataset
data$comprehensive <- matrix(c(factor2$score.dat$z1), ncol=1)
#Add comprehensiveness standard error to dataset
data$comprehensive_se <- matrix(c(factor2$score.dat$se.z1),ncol=1)
#Add comprehensiveness standard deviation to dataset
data$comprehensive_sd <- matrix(c(factor2$score.dat$se.z1),ncol=1)*sqrt(50)

################## Rescale Covariates ######################

#Rescale some covariates for analysis
data$percap <- data$PerCap10/1000 # Per Capita Income measured in thousands
data$squire <- data$squire*100 
data$edbudget <- data$edbudget*100 # Now measured in percentage instead of proportion
data$pctminority <- data$pctblack + data$pctlatino # combine pct black and pct latino to save degrees of freedom

data$adopt <- 0
data$adopt[which(data$total>0)] <- 1

######## Create neighbor comprehensiveness measure ########

neighbor <- read.csv("neighbor_raw.csv") #Read in spreadsheet of contiguous neighbors

neighbor_comp <- NULL #Create blank object for neighbor calculation

for(i in 1:nrow(data)){
	use.obs <- data[i,]
	if(use.obs$icpsr %in% c(81,82)){
		neighbor_comp[i] <- NA
	}
	else{
	obs.neighbor <- neighbor[which(neighbor$state_icpsr == use.obs$icpsr),]
	use.icpsr <- as.vector(obs.neighbor$neighbor_icpsr)
	use.data <- data[data$icpsr %in% use.icpsr,]
	neighbor_comp[i] <- mean(use.data$comprehensive)	
}}

data$neighbor_comp <- neighbor_comp

################## Output Dataset #########################

write.dta(data, file="cross_sect_w_comprehensive.dta")
write.csv(data, file="cross_sect_w_comprehensive.csv")

###########################################################
########## Analysis of comprehensiveness score ############
###########################################################


rm(list = ls(all = TRUE)) # Remove old items from R memory

library(foreign)

comp <- read.dta("cross_sect_w_comprehensive.dta")
#comp <- read.csv("cross_sect_w_comprehensive.csv")

data <- comp # reset working dataset

#Add Count of time since first adoption to dataset
data$time <- data$year - 1999
data$time[26] <- 13 #Set Montana to 13 years


###########################################################
######## Plot of cumulative adoptions (Figure 2) ##########
###########################################################

#install.packages("doBy")  # Uncomment to install doBy package
library(doBy)

use.data <- cbind(data$year, data$adopt) # Pull data for counting
use.data <- use.data[which(is.na(use.data[,1])==FALSE),] #Remove NAs
annual.sum <- aggregate(use.data[,2]~use.data[,1], FUN=sum)

keep <- matrix(NA, nrow=nrow(annual.sum),ncol=1)

for(i in 1:nrow(annual.sum)){
	if (i==1){
	keep[i,] <- annual.sum[i,2]	
	}
	else{
	keep[i,] <- keep[i-1,] + annual.sum[i,2]	
	}
}

annual.sum <- cbind(annual.sum,keep)
colnames(annual.sum) <- c("year", "annual_total", "cumulative_total")
annual.sum$cumulative_percent <- (annual.sum$cumulative_total/50)*100

#Split data based on whether state has enhanced their anti-bullying policies since the initial adoption date
reinvent <- data[which(data$code %in% c("AZ", "AR", "CA", "CO","CT", "GA", "ID", "IL", "IN", "KS", "LA", "MD", "MS", "MO", "NE", "NV", "NH", "NJ", "NM", "OH", "OK", "OR", "RI", "TN", "TX", "UT", "VT", "VA", "WA", "WV")),]

`%notin%` <- function(x,y) !(x %in% y)
no.reinvent <- data[which(data$code %notin% c("AZ", "AR", "CA", "CO","CT", "GA", "ID", "IL", "IN", "KS", "LA", "MD", "MS", "MO", "NE", "NV", "NH", "NJ", "NM", "OH", "OK", "OR", "RI", "SC", "TN", "TX", "UT", "VT", "VA", "WA", "WV")),]


###### Figure 2 ######

setEPS()
postscript("adoptions_over_time_alt.eps")
#pdf("adoptions_over_time_alt.pdf")
plot.new()
par(mar=c(4,5,1,5))
plot.window(xlim=c(1999,2012.5), ylim=c(-2.0, 2.0))
points(x=reinvent$year, y=jitter(reinvent$comprehensive, factor=2.5), pch=17, col="red")
points(x=no.reinvent$year, y=jitter(no.reinvent$comprehensive, factor=2.5), pch=15, col="black")
text(x=reinvent$year, y=jitter(reinvent$comprehensive, factor=2.5), reinvent$code, cex=0.6, pos=4, col="red")
text(x=no.reinvent$year, y=jitter(no.reinvent$comprehensive, factor=2.5), no.reinvent$code, cex=0.6, pos=4, col="black")
axis(1, at=c(1999:2012), labels=c(1999,"", 2001,"", 2003,"", 2005,"", 2007,"", 2009,"", 2011, 2012), cex.axis=.8)
axis(2, at=c(-2.0, 2.0), labels=c("Minimum", "Maximum"), las=2, cex.axis=.8)
mtext("Comprehensiveness of State Bullying Policy", side=2, line=2)
mtext("Year of Initial Adoption", side=1, line=2.5)
dev.off()

###########################################################
############ Summary statistics (Table A1) ##############
###########################################################

#install.packages("psych") # Uncomment to install psych package
library(psych)

describe(data[c("comprehensive", "neighbor_comp", "fire13", "freedom_personal", "percap", "edbudget", "squire", "disabled", "lgbt_house", "pctminority", "time")], skew=FALSE, ranges=FALSE)

###########################################################
############## Model of Comprehensiveness #################
###########################################################

#Base Model (Model 1)

base.model <- lm(comprehensive ~ neighbor_comp + fire13 + percap + squire + disabled + lgbt_house + pctminority + time, data=data)
summary(base.model)

#Enumerated Groups (Model 2)

enum.model <- glm(enum_groups ~ neighbor_comp + fire13 + percap + squire + disabled + lgbt_house + pctminority + time, family=binomial(link="logit"), data=data)
summary(enum.model)

#Education Expenditures (Model 3)
edu.model <- lm(comprehensive ~ neighbor_comp + fire13 + edbudget + squire + disabled + lgbt_house + pctminority + time, data=data)
summary(edu.model)

#General Personal Freedoms (Model 4)
freedom.model <- lm(comprehensive ~ neighbor_comp + freedom_personal + percap + squire + disabled + lgbt_house + pctminority + time, data=data)
summary(freedom.model)

#No Promo Homo (Model 5)
nph.model <- lm(comprehensive ~ neighbor_comp + fire13 + percap + squire + disabled + lgbt_house + pctminority + nopromohomo + time, data=data)
summary(nph.model)

#Model of Dept of Ed Expansiveness Score (Not Included)
exp.model <- lm(expansiveness ~ fire13 + percap + squire + disabled + lgbt_house + pctminority, data=data)
summary(exp.model)


###########################################################
############### Expected Comprehensiveness ################
###########################################################

#install.packages("Hmisc") #Uncommment to install Hmisc package
library(Hmisc)

###################### Neighbor ###########################

neigh.seq <- seq(from=min(data$neighbor_comp, na.rm=TRUE), to=max(data$neighbor_comp, na.rm=TRUE), by=0.05)

neigh.frame <- data.frame(
neighbor_comp=neigh.seq, 
fire13=mean(data$fire13), 
percap=mean(data$percap), 
squire=mean(data$squire), 
disabled=mean(data$disabled), 
lgbt_house=mean(data$lgbt_house), 
pctminority=mean(data$pctminority),
time=mean(data$time))

neigh.predict <- predict(base.model, newdata=neigh.frame, interval="confidence", level=0.95)

#Calculate min to max effect
`%notin%` <- function(x,y) !(x %in% y)
(max(neigh.predict[,1]) - min(neigh.predict[,1]))/sd(data$comprehensive[which(data$code %notin% c("AK", "HI"))]) # 2.03 sd decline in comprehensiveness

min(data$neighbor_comp[which(data$code %notin% c("AK", "HI"))]) # -2.09
max(data$neighbor_comp[which(data$code %notin% c("AK", "HI"))]) # 0.98

################# Per Capita Income #######################

income.seq <- seq(from=min(data$percap), to=max(data$percap), by=0.1)

income.frame <- data.frame(
neighbor_comp=mean(data$neighbor_comp, na.rm=TRUE), 
fire13=mean(data$fire13), 
percap=income.seq, 
squire=mean(data$squire), 
disabled=mean(data$disabled), 
lgbt_house=mean(data$lgbt_house), 
pctminority=mean(data$pctminority),
time=mean(data$time))

income.predict <- predict(base.model, newdata=income.frame, interval="confidence", level=0.95)

#Calculate min to max effect
(max(income.predict[,1]) - min(income.predict[,1]))/sd(data$comprehensive[which(data$code %notin% c("AK", "HI"))]) #1.72 SD increase

min(data$percap[which(data$code %notin% c("AK", "HI"))]) # 28,066
max(data$percap[which(data$code %notin% c("AK", "HI"))]) # 50,398

################# Disabled Students ######################

disabled.seq <- seq(from=min(data$disabled), to=max(data$disabled), by=0.1)

disabled.frame <- data.frame(
neighbor_comp=mean(data$neighbor_comp, na.rm=TRUE), 
fire13=mean(data$fire13), 
percap=mean(data$percap), 
squire=mean(data$squire), 
disabled=disabled.seq, 
lgbt_house=mean(data$lgbt_house), 
pctminority=mean(data$pctminority),
time=mean(data$time))

disabled.predict <- predict(base.model, newdata=disabled.frame, interval="confidence", level=0.95)

#Calculate min to max effect
(max(disabled.predict[,1]) - min(disabled.predict[,1]))/sd(data$comprehensive[which(data$code %notin% c("AK", "HI"))]) #2.23 sd increase

min(data$disabled[which(data$code %notin% c("AK", "HI"))]) # 4.4
max(data$disabled[which(data$code %notin% c("AK", "HI"))]) # 9.4

################## LGBT Households ######################

lgbt.seq <- seq(from=min(data$lgbt_house), to=max(data$lgbt_house), by=0.1)

lgbt.frame <- data.frame(
neighbor_comp=mean(data$neighbor_comp, na.rm=TRUE), 
fire13=mean(data$fire13), 
percap=mean(data$percap), 
squire=mean(data$squire), 
disabled=mean(data$disabled), 
lgbt_house=lgbt.seq, 
pctminority=mean(data$pctminority),
time=mean(data$time))

lgbt.predict <- predict(base.model, newdata=lgbt.frame, interval="confidence", level=0.95)

#Calculate min to max effect
(max(lgbt.predict[,1]) - min(lgbt.predict[,1]))/sd(data$comprehensive[which(data$code %notin% c("AK", "HI"))]) #1.40 sd increase

min(data$lgbt_house[which(data$code %notin% c("AK", "HI"))]) # 1.99
max(data$lgbt_house[which(data$code %notin% c("AK", "HI"))]) # 8.36

################## Percent Minorities ###################

minority.seq <- seq(from=min(data$pctminority), to=max(data$pctminority), by=0.1)

minority.frame <- data.frame(
neighbor_comp=mean(data$neighbor_comp, na.rm=TRUE), 
fire13=mean(data$fire13), 
percap=mean(data$percap), 
squire=mean(data$squire), 
disabled=mean(data$disabled), 
lgbt_house=mean(data$lgbt_house), 
pctminority=minority.seq,
time=mean(data$time))

minority.predict <- predict(base.model, newdata=minority.frame, interval="confidence", level=0.95)

#Calculate min to max effect
(max(minority.predict[,1]) - min(minority.predict[,1]))/sd(data$comprehensive[which(data$code %notin% c("AK", "HI"))]) #1.48 sd increase

min(data$pctminority[which(data$code %notin% c("AK", "HI"))]) # 2.5
max(data$pctminority[which(data$code %notin% c("AK", "HI"))]) # 49.4

######## Plot the Predicted Values (Figure 3) #########

## Source code http://faculty.washington.edu/cadolph/vis/iverbase.r



setEPS()
postscript("predicted_values.eps")
#pdf("predicted_values.pdf")
layout.matrix <- rbind(c(1,1,2,2), c(3,3,4,4), c(6,5,5,7)) #Create matrix for laying out plots
layout(layout.matrix) #Creates grid in plot window based on the layout matrix
#layout.show(7) #uncomment to show the seven panels in your plot window. Useful for re-arranging plots

#par(mfrow=c(2,2), mar=c(5,4,2,2))
par(mar=c(5,4,2,2))

plot.new()
plot.window(xlim=c(-2,1), ylim=c(-2.5,2.5))
#matplot(minority.frame$pctminority, minority.predict, lty=c(1,2,2), type="l", col=c("Black", "Firebrick", "Firebrick"), ylab="Predicted Comprehensiveness", xlab="Percentage Black and Hispanic Population", ylim=c(-2,2), xlim=c(0,50), axes=FALSE, bty="n")
polygon(c(neigh.seq, rev(neigh.seq), neigh.seq[1]), c(neigh.predict[,2], rev(neigh.predict[,3]),neigh.predict[1,2]), col="gray", border=FALSE)
lines(x=neigh.seq, y=neigh.predict[,1], col="black")
lines(x=neigh.seq, y=neigh.predict[,2], col="black")
lines(x=neigh.seq, y=neigh.predict[,3], col="black")
axis(1, at=seq(from=-2, to=1, by=.5), labels=seq(from=-2,to=1,by=0.5) )
axis(2, at=seq(from=-2.5,to=2.5,by=1), labels=seq(from=-2.5,to=2.5,by=1))
minor.tick(nx=2,ny=2,tick.ratio=0.5)
title(main="", xlab="Average Neighbor Comprehensiveness", ylab="Expected Comprehensiveness")
mtext(text=c("(a)"), side=3)

plot.new()
plot.window(xlim=c(28,50), ylim=c(-2.5,2.5))
#matplot(income.frame$percap, income.predict, lty=c(1,1,1), type="l", col=c("black", "black", "black"), ylab="Predicted Comprehensiveness", xlab="Per Capita Income ($1,000s)",ylim=c(-2,2), xlim=c(28,50), axes=FALSE, bty="n", plot=FALSE)
polygon(c(income.seq, rev(income.seq), income.seq[1]), c(income.predict[,2], rev(income.predict[,3]),income.predict[1,2]), col="gray", border=FALSE)
lines(x=income.seq, y=income.predict[,1], col="black")
lines(x=income.seq, y=income.predict[,2], col="black")
lines(x=income.seq, y=income.predict[,3], col="black")
axis(1, at=seq(from=30,to=50, by=5), labels=seq(30,to=50,by=5))
axis(2, at=seq(from=-2.5,to=2.5,by=1), labels=seq(from=-2.5,to=2.5,by=1))
minor.tick(nx=2,ny=2,tick.ratio=0.5)
title(main="", xlab="Per Capita Income ($1,000s)", ylab="Expected Comprehensiveness")
mtext(text=c("(b)"), side=3)

plot.new()
plot.window(xlim=c(4,10), ylim=c(-2.5,2.5))
#matplot(disabled.frame$disabled, disabled.predict, lty=c(1,2,2), type="l", col=c("Black", "Firebrick", "Firebrick"), ylab="Predicted Comprehensiveness", xlab="Percentage Disabled Students", ylim=c(-2,2), xlim=c(4,10), axes=FALSE, bty="n")
polygon(c(disabled.seq, rev(disabled.seq), disabled.seq[1]), c(disabled.predict[,2], rev(disabled.predict[,3]),disabled.predict[1,2]), col="gray", border=FALSE)
lines(x=disabled.seq, y=disabled.predict[,1], col="black")
lines(x=disabled.seq, y=disabled.predict[,2], col="black")
lines(x=disabled.seq, y=disabled.predict[,3], col="black")
axis(1, at=seq(from=4,to=10, by=1), labels=seq(from=4,to=10,by=1))
axis(2, at=seq(from=-2.5,to=2.5,by=1), labels=seq(from=-2.5,to=2.5,by=1))
minor.tick(nx=2,ny=2,tick.ratio=0.5)
title(main="", xlab="Percentage Disabled Students", ylab="Expected Comprehensiveness")
mtext(text=c("(c)"), side=3)

plot.new()
plot.window(xlim=c(2,9), ylim=c(-2.5,2.5))
#matplot(lgbt.frame$lgbt_house, lgbt.predict, lty=c(1,2,2), type="l", col=c("Black", "Firebrick", "Firebrick"), ylab="Predicted Comprehensiveness", xlab="Percentage LGBT Households", ylim=c(-2,2), xlim=c(2,9), axes=FALSE, bty="n")
polygon(c(lgbt.seq, rev(lgbt.seq), lgbt.seq[1]), c(lgbt.predict[,2], rev(lgbt.predict[,3]),lgbt.predict[1,2]), col="gray", border=FALSE)
lines(x=lgbt.seq, y=lgbt.predict[,1], col="black")
lines(x=lgbt.seq, y=lgbt.predict[,2], col="black")
lines(x=lgbt.seq, y=lgbt.predict[,3], col="black")
axis(1, at=seq(from=2.0,to=9, by=1), labels=seq(from=2,to=9,by=1))
axis(2, at=seq(from=-2.5,to=2.5,by=1), labels=seq(from=-2.5,to=2.5,by=1))
minor.tick(nx=2,ny=2,tick.ratio=0.5)
title(main="", xlab="Percentage LGBT Households", ylab="Expected Comprehensiveness")
mtext(text=c("(d)"), side=3)

plot.new()
plot.window(xlim=c(0,50), ylim=c(-2.5,2.5))
#matplot(minority.frame$pctminority, minority.predict, lty=c(1,2,2), type="l", col=c("Black", "Firebrick", "Firebrick"), ylab="Predicted Comprehensiveness", xlab="Percentage Black and Hispanic Population", ylim=c(-2,2), xlim=c(0,50), axes=FALSE, bty="n")
polygon(c(minority.seq, rev(minority.seq), minority.seq[1]), c(minority.predict[,2], rev(minority.predict[,3]),minority.predict[1,2]), col="gray", border=FALSE)
lines(x=minority.seq, y=minority.predict[,1], col="black")
lines(x=minority.seq, y=minority.predict[,2], col="black")
lines(x=minority.seq, y=minority.predict[,3], col="black")
axis(1, at=seq(from=0,to=50, by=10), labels=seq(from=0,to=50, by=10))
axis(2, at=seq(from=-2.5,to=2.5,by=1), labels=seq(from=-2.5,to=2.5,by=1))
minor.tick(nx=2,ny=2,tick.ratio=0.5)
title(main="", xlab="Percentage Black and Hispanic Population", ylab="Expected Comprehensiveness")
mtext(text=c("(e)"),side=3)
dev.off()

###########################################################
###### Comparison of no-promo-homo laws (Figure 4) ########
###########################################################

#subset based on noprohomo
nopromohomo1 <- data[which(data$nopromohomo==1),]
nrow(nopromohomo1) # 9 states
mean(nopromohomo1$comprehensive) # 0.03
sd(nopromohomo1$comprehensive) # 0.50
nopromohomo0 <- data[which(data$nopromohomo==0),]
nrow(nopromohomo0) # 41 states
mean(nopromohomo0$comprehensive) # 0.02
sd(nopromohomo0$comprehensive) # 0.95
t.test(nopromohomo1$comprehensive, nopromohomo0$comprehensive) #t(50) = 0.07, p = 0.95

setEPS()
postscript("boxplot.eps")
#pdf("boxplot.pdf")
par(mar=c(5,5,0,1))
boxplot(comprehensive~nopromohomo, ylim=c(-3,2), outline=TRUE, data=data, axes=FALSE, varwidth=TRUE, boxwex=.5, col=c("#d8b365", "#5ab4ac"), outpch=16, horizontal=TRUE)
axis(2, at=c(1,2), labels=c("No", "Yes"), lty=0)
axis(1, at=c(-3,-2,-1,0,1,2), labels=c(-3, -2,-1,0,1,2))
#legend(2,2, c("No", "Yes"), col=c("red", "blue"), fill=c("red", "blue"))
title(main="", ylab="Presence of State \"No Promo Homo\" Laws", xlab="Comprehensiveness of Anti-bullying Statute", cex.lab=0.85)
dev.off()